BOT_NAME = 'ximalaya_sqlite'

SPIDER_MODULES = ['ximalaya_scraper.spiders']
NEWSPIDER_MODULE = 'ximalaya_scraper.spiders'

# 遵守robots.txt
ROBOTSTXT_OBEY = False

# 并发设置
CONCURRENT_REQUESTS = 1
CONCURRENT_REQUESTS_PER_DOMAIN = 1
DOWNLOAD_DELAY = 1

# 启用中间件
DOWNLOADER_MIDDLEWARES = {
    'ximalaya_scraper.middlewares.RandomUserAgentMiddleware': 543,
    'ximalaya_scraper.middlewares.ProxyMiddleware': 544,
    'ximalaya_scraper.middlewares.DelayMiddleware': 545,
    'ximalaya_scraper.middlewares.CustomRetryMiddleware': 550,
    'scrapy.downloadermiddlewares.retry.RetryMiddleware': None,
}

# 启用管道
ITEM_PIPELINES = {
    'ximalaya_scraper.pipelines.SQLitePipeline': 300,
}

# SQLite数据库配置
SQLITE_DB = 'ximalaya_data.db'

# 重试设置
RETRY_ENABLED = True
RETRY_TIMES = 3
RETRY_HTTP_CODES = [500, 502, 503, 504, 408, 429, 403]

# Cookie设置
COOKIES_ENABLED = False

# 自动限速
AUTOTHROTTLE_ENABLED = True
AUTOTHROTTLE_START_DELAY = 1
AUTOTHROTTLE_MAX_DELAY = 10
AUTOTHROTTLE_TARGET_CONCURRENCY = 2.0

# 深度限制
DEPTH_LIMIT = 3

# 日志设置
LOG_LEVEL = 'INFO'
LOG_FILE = 'ximalaya_spider.log'

# 从代理管理器获取代理IP
from proxy_manager import proxy_manager
PROXY_LIST = proxy_manager.get_proxies()

# User-Agent列表
USER_AGENTS = [
    'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36',
    'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36',
    'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36',
    'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:89.0) Gecko/20100101 Firefox/89.0',
    'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/14.1.1 Safari/605.1.15'
]